package com.cold.demo.jsoup.taobao;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.seimicrawler.xpath.JXDocument;
import org.seimicrawler.xpath.JXNode;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class YunChuBaiJsoup {
    public static void main(String[] args) throws IOException {
        String url = "https://s.taobao.com/search?q=%E8%BF%9E%E8%A1%A3%E8%A3%99";
        String headers = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9\n" +
                "Accept-Encoding: gzip, deflate, br\n" +
                "Accept-Language: zh-CN,zh-TW;q=0.9,zh;q=0.8\n" +
                "Cache-Control: no-cache\n" +
                "Connection: keep-alive\n" +
                "Cookie: cna=t9SXGQWaLAsCAXU9AMNFdhNA; tracknick=tb2274741_2011; enc=VzpTzbRhAJruZ2RgdhJ%2F6Oob0ZlUMZKV3CEl4z2EMkp6Y1EVA3T6Tp0L9wSMupHLEy%2Fo0lzhRMGwS5zldzmefQ%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; thw=cn; birthday_displayed=1; t=13b8b52b527e590fa9994bd9df10b448; sgcookie=E100BukPOPgoLsWWTRCiRMzVGcNyXnUHI6DK43iVj2DAHveToE7DsK2%2FVhte90bU4AVi2b2ZMnbcIK9%2FxmV9CPHUOXqpsIQwbcl3m5OTL5LBzhQ%3D; uc3=id2=VAYgivNwiHKz&nk2=F5RHo3xjyaSHQBOr5%2F4%3D&vt3=F8dCujaJqev9z9hiREc%3D&lg2=W5iHLLyFOGW7aA%3D%3D; lgc=tb2274741_2011; uc4=id4=0%40Vh%2ByzE1m%2FyfNLcebM5cm8ZLAGCk%3D&nk4=0%40FY4MsTY%2BwA40TiPJ1qLBWsXBe7hGE7fiPA%3D%3D; _cc_=UtASsssmfA%3D%3D; mt=ci=-1_0; _tb_token_=e731ee46533e3; _m_h5_tk=5e3a3d7536e73a253522f53f3628cc5b_1633689000183; _m_h5_tk_enc=6dec9879abecb5a623979bfb108af446; xlly_s=1; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; cookie2=1000418352bea66ec83575aabb5726db; JSESSIONID=BE63A637D6937125AEB222009FD57BFD; uc1=cookie14=Uoe3dPunxKur8w%3D%3D; isg=BLa23H9Uy16BIr93bcN3jEOaB-y41_oRLFLzoSCfTBn8Y1b9iGXBIFHRfz8PS_Ip; tfstk=cNdfByiNcoqjXWDagEgzbPqYgh51az4C5xsDldMyDKpYzmYc2sf3TMw1odqTw9Q5.; l=eBjhAZ-PgTn13NUEBO5anurza77OfIRb4sPzaNbMiInca6tf9BdsRNCLKioJWdtjgtCnCetrd8LeqdLHR3cDiNAJz3h2q_rt3xvO.\n" +
                "Host: s.taobao.com\n" +
                "Pragma: no-cache\n" +
                "sec-ch-ua: \"Google Chrome\";v=\"93\", \" Not;A Brand\";v=\"99\", \"Chromium\";v=\"93\"\n" +
                "sec-ch-ua-mobile: ?0\n" +
                "sec-ch-ua-platform: \"Windows\"\n" +
                "Sec-Fetch-Dest: document\n" +
                "Sec-Fetch-Mode: navigate\n" +
                "Sec-Fetch-Site: none\n" +
                "Sec-Fetch-User: ?1\n" +
                "Upgrade-Insecure-Requests: 1\n" +
                "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36";
        Connection connect = Jsoup.connect(url);
        List<String> headerList = Arrays.asList(headers.split("\n"));
        Map<String, String> h = new HashMap<>();
        headerList.forEach(str->{
            String[] split = str.split(": ");
            h.put(split[0],str.replace(split[0],""));
        });
        connect.headers(h);
        Document     document = connect.get();
        JXDocument   jxd      = JXDocument.create(document);
        List<Object> a_list     = jxd.sel("//div[@class=\"pic\"]/a/@href");
        List<Object> img_list     = jxd.sel("//div[@class=\"pic\"]/a/img/@src");
        System.out.println();
    }
}
